{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "c861645d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  WARNING: The script isympy.exe is installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
      "  WARNING: The scripts f2py.exe and numpy-config.exe are installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
      "  WARNING: The script normalizer.exe is installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
      "  WARNING: The script tqdm.exe is installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
      "  WARNING: The scripts torchfrtrace.exe and torchrun.exe are installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
      "  WARNING: The scripts hf.exe, huggingface-cli.exe and tiny-agents.exe are installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
      "  WARNING: The scripts accelerate-config.exe, accelerate-estimate-memory.exe, accelerate-launch.exe, accelerate-merge-weights.exe and accelerate.exe are installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n"
     ]
    }
   ],
   "source": [
    "!pip install -q --upgrade bitsandbytes accelerate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ba0f9487",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "import threading\n",
    "from dotenv import load_dotenv\n",
    "from IPython.display import Markdown, display, update_display\n",
    "from openai import OpenAI\n",
    "from huggingface_hub import login\n",
    "from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, BitsAndBytesConfig\n",
    "import torch\n",
    "import gradio as gr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "70cc41a4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
     ]
    }
   ],
   "source": [
    "load_dotenv(override=True)\n",
    "hf_token = os.getenv('HF_TOKEN')\n",
    "login(hf_token, add_to_git_credential=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "a197a483",
   "metadata": {},
   "outputs": [
    {
     "ename": "PackageNotFoundError",
     "evalue": "No package metadata was found for bitsandbytes",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mStopIteration\u001b[39m                             Traceback (most recent call last)",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\importlib\\metadata\\__init__.py:397\u001b[39m, in \u001b[36mDistribution.from_name\u001b[39m\u001b[34m(cls, name)\u001b[39m\n\u001b[32m    396\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m397\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mdiscover\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    398\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n",
      "\u001b[31mStopIteration\u001b[39m: ",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[31mPackageNotFoundError\u001b[39m                      Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;43;01mclass\u001b[39;49;00m\u001b[38;5;250;43m \u001b[39;49m\u001b[34;43;01mGenerateMinute\u001b[39;49;00m\u001b[43m:\u001b[49m\n\u001b[32m      2\u001b[39m \u001b[43m  \u001b[49m\u001b[43maudio_model\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mopenai/whisper-medium.en\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m      3\u001b[39m \u001b[43m  \u001b[49m\u001b[43mllm_model\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmeta-llama/Llama-3.2-3B-Instruct\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 4\u001b[39m, in \u001b[36mGenerateMinute\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m      2\u001b[39m audio_model = \u001b[33m\"\u001b[39m\u001b[33mopenai/whisper-medium.en\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m      3\u001b[39m llm_model = \u001b[33m\"\u001b[39m\u001b[33mmeta-llama/Llama-3.2-3B-Instruct\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m quant_config = \u001b[43mBitsAndBytesConfig\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m      5\u001b[39m \u001b[43m    \u001b[49m\u001b[43mload_in_4bit\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m      6\u001b[39m \u001b[43m    \u001b[49m\u001b[43mbnb_4bit_use_double_quant\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m      7\u001b[39m \u001b[43m    \u001b[49m\u001b[43mbnb_4bit_compute_dtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtorch\u001b[49m\u001b[43m.\u001b[49m\u001b[43mbfloat16\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m      8\u001b[39m \u001b[43m    \u001b[49m\u001b[43mbnb_4bit_quant_type\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mnf4\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m      9\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     11\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, progress, audio_model=audio_model, llm_model=llm_model):\n\u001b[32m     12\u001b[39m   \u001b[38;5;28mself\u001b[39m.progress = progress\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\hp\\projects\\gen-ai\\llm_engineering\\.venv\\Lib\\site-packages\\transformers\\utils\\quantization_config.py:510\u001b[39m, in \u001b[36mBitsAndBytesConfig.__init__\u001b[39m\u001b[34m(self, load_in_8bit, load_in_4bit, llm_int8_threshold, llm_int8_skip_modules, llm_int8_enable_fp32_cpu_offload, llm_int8_has_fp16_weight, bnb_4bit_compute_dtype, bnb_4bit_quant_type, bnb_4bit_use_double_quant, bnb_4bit_quant_storage, **kwargs)\u001b[39m\n\u001b[32m    507\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m kwargs:\n\u001b[32m    508\u001b[39m     logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mUnused kwargs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(kwargs.keys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. These kwargs are not used in \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.\u001b[34m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m510\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mpost_init\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\hp\\projects\\gen-ai\\llm_engineering\\.venv\\Lib\\site-packages\\transformers\\utils\\quantization_config.py:568\u001b[39m, in \u001b[36mBitsAndBytesConfig.post_init\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m    565\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m.bnb_4bit_use_double_quant, \u001b[38;5;28mbool\u001b[39m):\n\u001b[32m    566\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mbnb_4bit_use_double_quant must be a boolean\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m568\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.load_in_4bit \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m version.parse(\u001b[43mimportlib\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m.\u001b[49m\u001b[43mversion\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mbitsandbytes\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m) >= version.parse(\n\u001b[32m    569\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33m0.39.0\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    570\u001b[39m ):\n\u001b[32m    571\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m    572\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33m4 bit quantization requires bitsandbytes>=0.39.0 - please upgrade your bitsandbytes version\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    573\u001b[39m     )\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\importlib\\metadata\\__init__.py:889\u001b[39m, in \u001b[36mversion\u001b[39m\u001b[34m(distribution_name)\u001b[39m\n\u001b[32m    882\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mversion\u001b[39m(distribution_name):\n\u001b[32m    883\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Get the version string for the named package.\u001b[39;00m\n\u001b[32m    884\u001b[39m \n\u001b[32m    885\u001b[39m \u001b[33;03m    :param distribution_name: The name of the distribution package to query.\u001b[39;00m\n\u001b[32m    886\u001b[39m \u001b[33;03m    :return: The version string for the package as defined in the package's\u001b[39;00m\n\u001b[32m    887\u001b[39m \u001b[33;03m        \"Version\" metadata key.\u001b[39;00m\n\u001b[32m    888\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m889\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdistribution\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdistribution_name\u001b[49m\u001b[43m)\u001b[49m.version\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\importlib\\metadata\\__init__.py:862\u001b[39m, in \u001b[36mdistribution\u001b[39m\u001b[34m(distribution_name)\u001b[39m\n\u001b[32m    856\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdistribution\u001b[39m(distribution_name):\n\u001b[32m    857\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Get the ``Distribution`` instance for the named package.\u001b[39;00m\n\u001b[32m    858\u001b[39m \n\u001b[32m    859\u001b[39m \u001b[33;03m    :param distribution_name: The name of the distribution package as a string.\u001b[39;00m\n\u001b[32m    860\u001b[39m \u001b[33;03m    :return: A ``Distribution`` instance (or subclass thereof).\u001b[39;00m\n\u001b[32m    861\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m862\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDistribution\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_name\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdistribution_name\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\importlib\\metadata\\__init__.py:399\u001b[39m, in \u001b[36mDistribution.from_name\u001b[39m\u001b[34m(cls, name)\u001b[39m\n\u001b[32m    397\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mcls\u001b[39m.discover(name=name))\n\u001b[32m    398\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m399\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m PackageNotFoundError(name)\n",
      "\u001b[31mPackageNotFoundError\u001b[39m: No package metadata was found for bitsandbytes"
     ]
    }
   ],
   "source": [
    "class GenerateMinute:\n",
    "  audio_model = \"openai/whisper-medium.en\"\n",
    "  llm_model = \"meta-llama/Llama-3.2-3B-Instruct\"\n",
    "  quant_config = BitsAndBytesConfig(\n",
    "      load_in_4bit=True,\n",
    "      bnb_4bit_use_double_quant=True,\n",
    "      bnb_4bit_compute_dtype=torch.bfloat16,\n",
    "      bnb_4bit_quant_type=\"nf4\"\n",
    "  )\n",
    "\n",
    "  def __init__(self, progress, audio_model=audio_model, llm_model=llm_model):\n",
    "    self.progress = progress\n",
    "    self.audio_model = audio_model\n",
    "    self.llm_model = llm_model\n",
    "    self.tokenizer = AutoTokenizer.from_pretrained(self.llm_model)\n",
    "    self.tokenizer.pad_token = self.tokenizer.eos_token\n",
    "    self.model = AutoModelForCausalLM.from_pretrained(\n",
    "        self.llm_model, quantization_config=self.quant_config, device_map=\"auto\"\n",
    "      )\n",
    "    \n",
    "  def audio_to_text(self, audio_filepath):\n",
    "    self.progress(0.4, desc=\"Transcribing audio...\")\n",
    "    try:\n",
    "      if audio_filepath is None:\n",
    "        raise ValueError(\"No audio file provided\")\n",
    "        \n",
    "      if not os.path.exists(audio_filepath):\n",
    "        raise ValueError(\"Audio file not found: {file_path}\")\n",
    "\n",
    "      pipe = pipeline(\n",
    "          \"automatic-speech-recognition\",\n",
    "          model=self.audio_model,\n",
    "          chunk_length_s=30,\n",
    "          device=\"cuda\",\n",
    "          return_timestamps=True\n",
    "      )\n",
    "\n",
    "      response = pipe(audio_filepath)\n",
    "\n",
    "      text = response.strip()\n",
    "\n",
    "      if not text:\n",
    "        raise ValueError(\"No speech detected in audio\")\n",
    "\n",
    "      return text\n",
    "\n",
    "    except Exception as e:\n",
    "      raise ValueError(e)\n",
    "\n",
    "  def create_minute(self, transcription):\n",
    "    self.progress(0.7, desc=\"Generating meeting minutes...\")\n",
    "\n",
    "    system_message = \"\"\"\n",
    "    You produce minutes of meetings from transcripts, with summary, key discussion points,\n",
    "    takeaways and action items with owners, in markdown format without code blocks.\n",
    "    \"\"\"\n",
    "\n",
    "    user_prompt = f\"\"\"\n",
    "    Below is an extract transcript of a Denver council meeting.\n",
    "    Please write minutes in markdown without code blocks, including:\n",
    "    - a summary with attendees, location and date\n",
    "    - discussion points\n",
    "    - takeaways\n",
    "    - action items with owners\n",
    "\n",
    "    Transcription:\n",
    "    {transcription}\n",
    "    \"\"\"\n",
    "\n",
    "    messages = [\n",
    "      {\"role\": \"system\", \"content\": system_message},\n",
    "      {\"role\": \"user\", \"content\": user_prompt}\n",
    "    ]\n",
    "\n",
    "    inputs = self.tokenizer(messages, return_tensors=\"pt\").to(self.model.device)\n",
    "    streamer = TextIteratorStreamer(self.tokenizer)\n",
    "\n",
    "    thread = threading.Thread(\n",
    "        target=self.model.generate, \n",
    "        kwargs={\n",
    "          \"input_ids\": inputs,\n",
    "          \"max_new_tokens\": 2000,\n",
    "          \"streamer\": streamer\n",
    "        }\n",
    "    )\n",
    "\n",
    "    thread.start()\n",
    "    started = False\n",
    "\n",
    "    for new_text in streamer:\n",
    "      if not started:\n",
    "        if \"<|start_header_id|>assistant<|end_header_id|>\" in new_text:\n",
    "            started = True\n",
    "            new_text = new_text.split(\"<|start_header_id|>assistant<|end_header_id|>\")[-1].strip()\n",
    "\n",
    "      if started:\n",
    "        if \"<|eot_id|>\" in new_text:\n",
    "            new_text = new_text.replace(\"<|eot_id|>\", \"\")  # Remove the unwanted token\n",
    "\n",
    "        if new_text.strip():  # Only yield non-empty chunks\n",
    "            yield new_text\n",
    "\n",
    "  def process_meeting(self, audio_filepath, audio_model, llm_model ):\n",
    "    self.audio_model = audio_model\n",
    "    self.llm_model = llm_model\n",
    "    self.progress(0.2, desc=\"Processing audio file...\")\n",
    "    try:\n",
    "      transcription = self.audio_to_text(audio_filepath)\n",
    "      minute = self.create_minute(transcription)\n",
    "\n",
    "      response = \"\"\n",
    "\n",
    "      for chunk in minute:\n",
    "        response += chunk\n",
    "        yield response\n",
    "\n",
    "    except Exception as e:\n",
    "      yield f\"Error processing meeting: {e}\""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
